/* Cleaning BOS data. */


#delimit ;
clear all;
local outfile "GWgap_compet_v3";
set more off;







*2******************************************************************************;
* Reading in BOS competition data and labelling variables;
*******************************************************************************;

di _n "$S_DATE $S_TIME";


import delimited using "Wgap_compet_v3.csv", varn(1) case(preserve) 
 clear; 

drop if enterprise_nbr=="";
drop if final_weight==0;

compress;

destring fte WP rme_no_WP, force replace;
drop selection adjusted enterprise_nbr;

gen fte_10plus = fte>=10 & fte<.;
label var fte_10plus "Firm has 10+ FTEs";

gen hc_5plus = rme_no_WP>=5 & rme_no_WP<.;
label var hc_5plus "Firm has head count 5+, excl WP ";

label var pent "Pent";
gen year = floor(dim_year_key/100);
label var year Year;
drop dim_year_key;

label var comp "Level of competition faced by the firm";
notes comp: Question: How would you describe this business' competition?;
label define comp 0 "no response" 1 "no competition" 2 "1-2 competitors" 
	3 "many competitors, several dominant" 4 "many competitors, none dominant"
	5 "don't know";
label val comp comp;

label var prody "Firm's relative productivity";
notes prody: Question: How do you think this business compares to its major 
competitors on each of the following? Productivity; 

label var profity "Firm's relative profitability";
notes profity: Question: How do you think this business compares to its major 
competitors on each of the following? Profitability;

label var rec_manag "Difficulty recruiting managers and professionals";
label var rec_tech "Difficulty recruiting technicians and associate professionals";
label var rec_trad "Difficulty recruiting tradespeople and related workers";
label var rec_other "Difficulty recruiting other occupations";
notes: Difficulty recruiting question is: Over the last financial year, to what extent
did this business experience difficulty in recruiting new staff for any of the following 
occupational groups?;

label var collec_emp "Percentage of employees with collective employment agreement";


label define rel 0 "no response" 1 "lower than" 2 "on a par with" 3 "higher than"
	4 "don't know";
label val prody rel;
label val profity rel;

label define rec 0 "no response" 1 "no difficulty" 2 "moderate difficulty" 3 "severe difficulty"
	4 "not applicable" 5 "don't know";
label val rec_* rec;

label define collec_emp 0 "missing" 1 "zero" 2 "10% or less" 3 "50% or less" 4 "90% or less" 
	5 "91-100%" 6 "don't know";
label val collec_emp collec_emp;

label var emp_manag "Managers and professionals employed";
label var emp_tech "Technicians and associate professionals employed";
label var emp_trad "Tradespeople and related workers employed";
label var emp_other "Other occupations employed";

label var any_internat "Whether any foreign ownership";
label define any_internat 0 "missing" 1 "no" 2 "yes" 3 "don't know";
label val any_internat any_internat;

label var perc_internat "Percentage of foreign ownership";
replace perc_internat = . if  perc_internat==999;
	
label var fte "Firm FTEs";


merge m:1 anz06_4d using anz_map, keep(master match) nogen;

rename nzsioc_4dig pf_ind;
replace pf_ind = "CC1" if substr(pf_ind,1,3)=="CC1";
replace pf_ind = "CC3" if substr(pf_ind,1,3)=="CC3";
replace pf_ind = "CC5" if substr(pf_ind,1,3)=="CC5";
replace pf_ind = "CC7" if substr(pf_ind,1,3)=="CC7";
replace pf_ind = "DD1" if substr(pf_ind,1,3)=="DD1";
replace pf_ind = "KK1_" if pf_ind=="KK11" | pf_ind=="KK12";
label var pf_ind "Industry";





*3******************************************************************************;
* Creating variables to collapse;
*******************************************************************************;

gen low_comp = comp==1 | comp==2;
label var low_comp "No competition, or 1-2 competitors";

gen low_comp2 = low_comp if comp>=1 & comp<=4;
label var low_comp2 "No competition, or 1-2 competitors, among valid responses";

gen vlow_comp = comp==1;
label var vlow_comp "No competition";

gen vlow_comp2 = comp==1  if comp>=1 & comp<=4;
label var vlow_comp2 "No competition, among valid responses";

gen mlow_comp = comp==2;
label var mlow_comp "1-2 competitors";

gen mlow_comp2 = comp==2 if comp>=1 & comp<=4;
label var mlow_comp2 "1-2 competitors, among valid responses";

gen high_comp = comp==4;
label var high_comp "Many competitors, none dominant";

gen high_comp2 = high_comp if comp>=1 & comp<=4;
label var high_comp2 "Many competitors, none dominant, among valid responses";

gen avg_comp = comp if comp>=1 & comp<=4;
label var avg_comp "Level of competition, among valid responses";


foreach prod in prody profity {;

	if "`prod'"=="prody" local prodl "productivity";
	if "`prod'"=="profity" local prodl "profitability";

	gen high_`prod' = `prod'==3;
	label var high_`prod' "Higher `prodl' than competitors";
	
	gen high_`prod'2 = high_`prod' if `prod'>=1 & `prod'<=3;
	label var high_`prod'2 "Higher `prodl' than competitors, among valid responses";
	
	gen low_`prod' = `prod'==1;
	label var low_`prod' "Lower `prodl' than competitors";
	
	gen low_`prod'2 = low_`prod' if `prod'>=1 & `prod'<=3;
	label var low_`prod'2 "Lower `prodl' than competitors, among valid responses";
	
	gen avg_`prod' = `prod' if `prod'>=1 & `prod'<=3;
	label var avg_`prod' "Relative level of `prodl', among valid responses";
};


foreach rec in manag tech trad other {;
	
	if "`rec'"=="manag" local recl "managers and professionals";
	if "`rec'"=="tech" local recl "technicians and associate professionals";
	if "`rec'"=="trad" local recl "tradespeople and related workers";
	if "`rec'"=="other" local recl "other occupations";
	
	gen avg_rec_`rec' = rec_`rec' if rec_`rec'>=1 & rec_`rec'<=3;
	label var avg_rec_`rec' "Level of difficulty recruiting `recl', among valid responses";
	
	gen drec_`rec' = (rec_`rec'==2 | rec_`rec'==3) if rec_`rec'>=1 & rec_`rec'<=3;
	label var drec_`rec' "Any difficulty recruiting `recl', among valid responses";
	
	gen srec_`rec' = rec_`rec'==3 if rec_`rec'>=1 & rec_`rec'<=3;
	label var srec_`rec' "Severe difficulty recruiting `recl', among valid responses";
};

recode collec_emp (0 6 = .) (1 = 0) (2 = 5) (3 = 30) (4 = 70) (5 = 95), gen(avg_collec_emp);
label var avg_collec_emp "Average % employees in collective emp agreements, among valid responses";
notes avg_collec_emp: Averages generated from band midpoints;

gen pos_collec_emp = (collec_emp>=2 & collec_emp<=5) if (collec_emp>=1 & collec_emp<=5);
label var pos_collec_emp "Positive % of employees in collective emp agreements, among valid responses";

gen frac_internat = perc_internat/100;
label var frac_internat "Fraction foreign-owned";

gen pos_internat = any_internat - 1;
replace pos_internat = . if pos_internat!=0 & pos_internat!=1;
label var pos_internat "Dummy for any international ownership";


save temp, replace;




*4******************************************************************************;
* Collapsing for all firms or firms with 10+ FTE or 5+ head count;
*******************************************************************************;

foreach samp in all large large5 {;

	use temp, clear;
	unab meas: low_comp - pos_internat;

	if "`samp'"=="all" {;
		local suff "_all";
	};
	if "`samp'"=="large" {;
		keep if fte_10plus==1;
		local suff "_lge";
	};
	if "`samp'"=="large5" {;
		keep if hc_5plus==1;
		local suff "_lg5";
	};

	gen fte_final_weight = fte*final_weight;
	gen hc_final_weight = rme_no_WP*final_weight;
	
	sort pf_ind year;
	foreach var in `meas' {;
		by pf_ind year: egen sum_wgt = total(final_weight) if `var'<.;
		by pf_ind year: egen sum_ftewgt = total(fte_final_weight) if `var'<.;
		by pf_ind year: egen sum_hcwgt = total(hc_final_weight) if `var'<.;
		gen wgt_`var' = `var'*final_weight/sum_wgt;
		gen ftewgt_`var' = `var'*fte*final_weight/sum_ftewgt;
		gen hcwgt_`var' = `var'*rme_no_WP*final_weight/sum_hcwgt;
		drop sum_wgt sum_ftewgt sum_hcwgt;
	};
	foreach emp in manag tech trad other {;
		gen wgt_emp_`emp' = emp_`emp'*final_weight;
		gen ftewgt_emp_`emp' = emp_`emp'*fte*final_weight;
		gen hcwgt_emp_`emp' = emp_`emp'*rme_no_WP*final_weight;
	};

	drop low_comp - pos_collec_emp;
	rename wgt_* *`suff';
	rename ftewgt_* *_fte`suff';
	rename hcwgt_* *_hc`suff';

	collapse (sum) *`suff', by(year pf_ind);
	
	foreach post in "" _fte _hc {;
	
		if "`post'"=="" local note "";
		if "`post'"=="_fte" local note " (fte wgtd)";
		if "`post'"=="_hc" local note " (hc wgtd)";
	
		label var low_comp`post'`suff' "Frac w/ no competition, or 1-2 competitors`note'";
		label var low_comp2`post'`suff' "Fract w/ no competn, or 1-2 competrs, among valid resp`note'";
		label var vlow_comp`post'`suff' "Fract w/ no competition`note'";
		label var vlow_comp2`post'`suff' "Fract w/ no competition, among valid resp`note'";
		label var mlow_comp`post'`suff' "Fract w/ 1-2 competitors`note'";
		label var mlow_comp2`post'`suff' "Fract w/ 1-2 competitors, among valid resp`note'";
		label var high_comp`post'`suff' "Fract w/ many competitors, none dominant`note'";
		label var high_comp2`post'`suff' "Fract w/ many competrs, none dominant, among valid resp`note'";
		label var avg_comp`post'`suff' "Average level of competition, among valid resp`note'";
		notes avg_comp`post'`suff': Scale is 1 (no competition) to 4 (many competitors, none dominant).;
	};
	
	foreach prod in prody profity {;

		if "`prod'"=="prody" local prodl "productivity";
		if "`prod'"=="profity" local prodl "profitability";
		
		foreach post in "" _fte _hc {;
		
			if "`post'"=="" local note "";
			if "`post'"=="_fte" local note " (fte wgtd)";
			if "`post'"=="_hc" local note " (hc wgtd)";

			label var high_`prod'`post'`suff' "Fract w/ higher `prodl' than competitors`note'";
			label var high_`prod'2`post'`suff' "Fract w/ higher `prodl' than comprs, among valid resp`note'";
			label var low_`prod'`post'`suff' "Fract w/ lower `prodl' than competitors`note'";
			label var low_`prod'2`post'`suff' "Fract w/ lower `prodl' than comprs, among valid resp`note'";
			label var avg_`prod'`post'`suff' "Avg relative level of `prodl', among valid resp`note'";
			notes avg_`prod'`post'`suff': Scale is 1 (lower than) to 3 (higher than).;
		};
	};
	
	
	foreach rec in manag tech trad other {;
	
		if "`rec'"=="manag" local recl "managers and professionals";
		if "`rec'"=="tech" local recl "technicians and associate professionals";
		if "`rec'"=="trad" local recl "tradespeople and related workers";
		if "`rec'"=="other" local recl "other occupations";
		
		foreach post in "" _fte _hc {;
		
			if "`post'"=="" local note "";
			if "`post'"=="_fte" local note " (fte wgtd)";
			if "`post'"=="_hc" local note " (hc wgtd)";
		
			label var avg_rec_`rec'`post'`suff' "Av diffic hiring `recl', (val resp)`note'";
			notes avg_rec_`rec'`post'`suff': Scale is 1 (no difficulty), 2 (moderate difficulty), 3 (severe difficulty);
			label var drec_`rec'`post'`suff' "Fract with any diffic hiring `recl', (val resp)`note'";
			label var srec_`rec'`post'`suff' "Fract with severe diffic hiring `recl', (val resp)`note'";
		};
	};
	gen wgt_emp_tot = emp_manag`suff' + emp_tech`suff' + emp_trad`suff' + emp_other`suff';
	gen ftewgt_emp_tot = emp_manag_fte`suff' + emp_tech_fte`suff' + emp_trad_fte`suff' + emp_other_fte`suff';
	gen hcwgt_emp_tot = emp_manag_hc`suff' + emp_tech_hc`suff' + emp_trad_hc`suff' + emp_other_hc`suff';

	foreach emp in manag tech trad other {;
		gen empfrac_`emp'`suff' = emp_`emp'`suff'/wgt_emp_tot;
		gen empfrac_fte_`emp'`suff' = emp_`emp'_fte`suff'/ftewgt_emp_tot;
		gen empfrac_hc_`emp'`suff' = emp_`emp'_hc`suff'/hcwgt_emp_tot;
		
		if "`emp'"=="manag" local recl "managers and professionals";
		if "`emp'"=="tech" local recl "technicians and associate professionals";
		if "`emp'"=="trad" local recl "tradespeople and related workers";
		if "`emp'"=="other" local recl "other occupations";
		
		label var empfrac_`emp'`suff' "Fraction of employees `recl'";
		label var empfrac_fte_`emp'`suff' "Fraction of employees `recl', (fte wgtd)";
		label var empfrac_hc_`emp'`suff' "Fraction of employees `recl', (hc wgtd)";
		
		drop emp_`emp'`suff' emp_`emp'_fte`suff' emp_`emp'_hc`suff';
	};
	drop wgt_emp_tot ftewgt_emp_tot hcwgt_emp_tot;
	
	gen avg_rec_hc`suff' = avg_rec_manag_hc`suff'*empfrac_hc_manag`suff'
		+ avg_rec_tech_hc`suff'*empfrac_hc_tech`suff'
		+ avg_rec_trad_hc`suff'*empfrac_hc_trad`suff'
		+ avg_rec_other_hc`suff'*empfrac_hc_other`suff';
	label var avg_rec_hc`suff' "Average recruitment difficulty (1-3 scale)";
	notes avg_rec_hc`suff': A weighted average of the avg difficulty hiring each occupation type
		in the industry, where the weights are fractions of employees in each occupation.;
	
	foreach post in "" _fte _hc {;
		
		if "`post'"=="" local note "";
		if "`post'"=="_fte" local note " (fte wgtd)";
		if "`post'"=="_hc" local note " (hc wgtd)";
		
		label var avg_collec_emp`post'`suff' "Avg % of emp in collective emp agreements, (val resp)`note'";
		label var pos_collec_emp`post'`suff' "Fract of firms w/ any emp in collec. agreements, (val resp)`note'";
		
		label var frac_internat`post'`suff' "Avg fraction of firm foreign owned, (val resp)`note'";
		label var pos_internat`post'`suff' "Fract of firms w/ any foreign ownership, (val resp)`note'";
	};
	
	save temp`suff', replace;
};

use temp_all, clear;
merge 1:1 year pf_ind using temp_lge, keep(master match using) nogen;
merge 1:1 year pf_ind using temp_lg5, keep(master match using) nogen;

notes: _all variables are calculated for all firms in the industry, _lge variables
are calculated for firms with 10+ FTE, _lg5 variables are calculated for firms
with 5+ head count excl WP. These are firm averages, adjusting for
sampling weights, and are not otherwise weighted by firm size.;
notes: _fte_all, _fte_lge, and _fte_lg5 variables are the same as the _all, _lge & _lg5 variables,
except the averages are weighted by firm ftes and sampling weights. Similarly, 
_hc_all, _hc_lge, and _hc_lg5 variables are weighted by firm head count and sampling weights;

compress year;

notes: GWgap_compet_v3.dta was created by GWgap_compet_v3.do, from csv file Wgap_compet_v3.csv.
An observation is a pf_ind industry in a year (2005-2013). The data are
aggregates from BOS that capture the level of competition and average views of
relative productivity and profitability in the industry.;

#delimit ;
drop empfrac_*_all empfrac_*_lge;
drop empfrac_manag_lg5 empfrac_tech_lg5 empfrac_trad_lg5 empfrac_other_lg5 empfrac_fte*;
save GWgap_compet_v3, replace;





